{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c9253ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Execute this cell to install dependencies\n",
    "%pip install sf-hamilton[visualization]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "62f66309",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# Shows how to run the spark pipeline. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/scraping_and_chunking/spark/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/scraping_and_chunking/spark/notebook.ipynb)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5ea50d85",
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from hamilton import driver\n",
    "\n",
    "import doc_pipeline\n",
    "import spark_pipeline\n",
    "\n",
    "dr = (\n",
    "    driver.Builder()\n",
    "    .with_modules(doc_pipeline, spark_pipeline)\n",
    "    .with_config({})\n",
    "    .build()\n",
    ")\n",
    "dag = dr.visualize_execution(\n",
    "    [\"chunked_url_text\"],\n",
    "    inputs={\"app_name\": \"chunking_spark_job\", \"num_partitions\": 4},\n",
    ")\n",
    "result = dr.execute(\n",
    "    [\"chunked_url_text\"],\n",
    "    inputs={\"app_name\": \"chunking_spark_job\", \"num_partitions\": 4},\n",
    ")\n",
    "print(result[\"chunked_url_text\"].show())\n",
    "dag"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
